<!DOCTYPE HTML>
<html lang="en" class="light" dir="ltr">
    <head>
        <!-- Book generated using mdBook -->
        <meta charset="UTF-8">
        <title>Retrieval Algorithms - PISA Guide</title>


        <!-- Custom HTML head -->
        
        <meta name="description" content="">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <meta name="theme-color" content="#ffffff">

        <link rel="icon" href="../favicon.svg">
        <link rel="shortcut icon" href="../favicon.png">
        <link rel="stylesheet" href="../css/variables.css">
        <link rel="stylesheet" href="../css/general.css">
        <link rel="stylesheet" href="../css/chrome.css">
        <link rel="stylesheet" href="../css/print.css" media="print">

        <!-- Fonts -->
        <link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
        <link rel="stylesheet" href="../fonts/fonts.css">

        <!-- Highlight.js Stylesheets -->
        <link rel="stylesheet" href="../highlight.css">
        <link rel="stylesheet" href="../tomorrow-night.css">
        <link rel="stylesheet" href="../ayu-highlight.css">

        <!-- Custom theme stylesheets -->

    </head>
    <body class="sidebar-visible no-js">
    <div id="body-container">
        <!-- Provide site root to javascript -->
        <script>
            var path_to_root = "../";
            var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "navy" : "light";
        </script>

        <!-- Work around some values being stored in localStorage wrapped in quotes -->
        <script>
            try {
                var theme = localStorage.getItem('mdbook-theme');
                var sidebar = localStorage.getItem('mdbook-sidebar');

                if (theme.startsWith('"') && theme.endsWith('"')) {
                    localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
                }

                if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
                    localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
                }
            } catch (e) { }
        </script>

        <!-- Set the theme before any content is loaded, prevents flash -->
        <script>
            var theme;
            try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
            if (theme === null || theme === undefined) { theme = default_theme; }
            var html = document.querySelector('html');
            html.classList.remove('light')
            html.classList.add(theme);
            var body = document.querySelector('body');
            body.classList.remove('no-js')
            body.classList.add('js');
        </script>

        <input type="checkbox" id="sidebar-toggle-anchor" class="hidden">

        <!-- Hide / unhide sidebar before it is displayed -->
        <script>
            var body = document.querySelector('body');
            var sidebar = null;
            var sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
            if (document.body.clientWidth >= 1080) {
                try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
                sidebar = sidebar || 'visible';
            } else {
                sidebar = 'hidden';
            }
            sidebar_toggle.checked = sidebar === 'visible';
            body.classList.remove('sidebar-visible');
            body.classList.add("sidebar-" + sidebar);
        </script>

        <nav id="sidebar" class="sidebar" aria-label="Table of contents">
            <div class="sidebar-scrollbox">
                <ol class="chapter"><li class="chapter-item expanded affix "><a href="../introduction.html">Introduction</a></li><li class="chapter-item expanded affix "><li class="part-title">User Guide</li><li class="chapter-item expanded "><a href="../guide/requirements.html"><strong aria-hidden="true">1.</strong> Requirements</a></li><li class="chapter-item expanded "><a href="../guide/installation.html"><strong aria-hidden="true">2.</strong> Installation</a></li><li class="chapter-item expanded "><a href="../guide/indexing-pipeline.html"><strong aria-hidden="true">3.</strong> Indexing Pipeline</a></li><li class="chapter-item expanded "><a href="../guide/parsing.html"><strong aria-hidden="true">4.</strong> Parsing</a></li><li class="chapter-item expanded "><a href="../guide/inverting.html"><strong aria-hidden="true">5.</strong> Inverting</a></li><li class="chapter-item expanded "><a href="../guide/compressing.html"><strong aria-hidden="true">6.</strong> Compressing</a></li><li class="chapter-item expanded "><a href="../guide/wand_data.html"><strong aria-hidden="true">7.</strong> "WAND" Data</a></li><li class="chapter-item expanded "><a href="../guide/querying.html"><strong aria-hidden="true">8.</strong> Querying</a></li><li class="chapter-item expanded "><a href="../guide/algorithms.html" class="active"><strong aria-hidden="true">9.</strong> Retrieval Algorithms</a></li><li class="chapter-item expanded "><a href="../guide/reordering.html"><strong aria-hidden="true">10.</strong> Document Reordering</a></li><li class="chapter-item expanded "><a href="../guide/sharding.html"><strong aria-hidden="true">11.</strong> Sharding</a></li><li class="chapter-item expanded "><a href="../guide/threshold-estimation.html"><strong aria-hidden="true">12.</strong> Threshold Estimation</a></li><li class="chapter-item expanded affix "><li class="part-title">Tutorials</li><li class="chapter-item expanded "><a href="../tutorial/robust04.html"><strong aria-hidden="true">13.</strong> Regression test for Robust04</a></li><li class="chapter-item expanded affix "><li class="part-title">CLI Reference</li><li class="chapter-item expanded "><a href="../cli/compress_inverted_index.html"><strong aria-hidden="true">14.</strong> compress_inverted_index</a></li><li class="chapter-item expanded "><a href="../cli/compute_intersection.html"><strong aria-hidden="true">15.</strong> compute_intersection</a></li><li class="chapter-item expanded "><a href="../cli/count-postings.html"><strong aria-hidden="true">16.</strong> count-postings</a></li><li class="chapter-item expanded "><a href="../cli/create_wand_data.html"><strong aria-hidden="true">17.</strong> create_wand_data</a></li><li class="chapter-item expanded "><a href="../cli/evaluate_queries.html"><strong aria-hidden="true">18.</strong> evaluate_queries</a></li><li class="chapter-item expanded "><a href="../cli/extract-maxscores.html"><strong aria-hidden="true">19.</strong> extract-maxscores</a></li><li class="chapter-item expanded "><a href="../cli/extract_topics.html"><strong aria-hidden="true">20.</strong> extract_topics</a></li><li class="chapter-item expanded "><a href="../cli/invert.html"><strong aria-hidden="true">21.</strong> invert</a></li><li class="chapter-item expanded "><a href="../cli/kth_threshold.html"><strong aria-hidden="true">22.</strong> kth_threshold</a></li><li class="chapter-item expanded "><a href="../cli/lexicon.html"><strong aria-hidden="true">23.</strong> lexicon</a></li><li class="chapter-item expanded "><a href="../cli/map_queries.html"><strong aria-hidden="true">24.</strong> map_queries</a></li><li class="chapter-item expanded "><a href="../cli/parse_collection.html"><strong aria-hidden="true">25.</strong> parse_collection</a></li><li class="chapter-item expanded "><a href="../cli/partition_fwd_index.html"><strong aria-hidden="true">26.</strong> partition_fwd_index</a></li><li class="chapter-item expanded "><a href="../cli/queries.html"><strong aria-hidden="true">27.</strong> queries</a></li><li class="chapter-item expanded "><a href="../cli/read_collection.html"><strong aria-hidden="true">28.</strong> read_collection</a></li><li class="chapter-item expanded "><a href="../cli/reorder-docids.html"><strong aria-hidden="true">29.</strong> reorder-docids</a></li><li class="chapter-item expanded "><a href="../cli/sample_inverted_index.html"><strong aria-hidden="true">30.</strong> sample_inverted_index</a></li><li class="chapter-item expanded "><a href="../cli/selective_queries.html"><strong aria-hidden="true">31.</strong> selective_queries</a></li><li class="chapter-item expanded "><a href="../cli/shards.html"><strong aria-hidden="true">32.</strong> shards</a></li><li class="chapter-item expanded "><a href="../cli/stem_queries.html"><strong aria-hidden="true">33.</strong> stem_queries</a></li><li class="chapter-item expanded "><a href="../cli/taily-stats.html"><strong aria-hidden="true">34.</strong> taily-stats</a></li><li class="chapter-item expanded "><a href="../cli/taily-thresholds.html"><strong aria-hidden="true">35.</strong> taily-thresholds</a></li><li class="chapter-item expanded "><a href="../cli/thresholds.html"><strong aria-hidden="true">36.</strong> thresholds</a></li><li class="chapter-item expanded affix "><li class="part-title">Specifications</li><li class="chapter-item expanded "><a href="../specs/lookup-table.html"><strong aria-hidden="true">37.</strong> Lookup Table</a></li></ol>
            </div>
            <div id="sidebar-resize-handle" class="sidebar-resize-handle"></div>
        </nav>

        <!-- Track and set sidebar scroll position -->
        <script>
            var sidebarScrollbox = document.querySelector('#sidebar .sidebar-scrollbox');
            sidebarScrollbox.addEventListener('click', function(e) {
                if (e.target.tagName === 'A') {
                    sessionStorage.setItem('sidebar-scroll', sidebarScrollbox.scrollTop);
                }
            }, { passive: true });
            var sidebarScrollTop = sessionStorage.getItem('sidebar-scroll');
            sessionStorage.removeItem('sidebar-scroll');
            if (sidebarScrollTop) {
                // preserve sidebar scroll position when navigating via links within sidebar
                sidebarScrollbox.scrollTop = sidebarScrollTop;
            } else {
                // scroll sidebar to current active section when navigating via "next/previous chapter" buttons
                var activeSection = document.querySelector('#sidebar .active');
                if (activeSection) {
                    activeSection.scrollIntoView({ block: 'center' });
                }
            }
        </script>

        <div id="page-wrapper" class="page-wrapper">

            <div class="page">
                                <div id="menu-bar-hover-placeholder"></div>
                <div id="menu-bar" class="menu-bar sticky">
                    <div class="left-buttons">
                        <label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
                            <i class="fa fa-bars"></i>
                        </label>
                        <button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
                            <i class="fa fa-paint-brush"></i>
                        </button>
                        <ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
                            <li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
                        </ul>
                        <button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
                            <i class="fa fa-search"></i>
                        </button>
                    </div>

                    <h1 class="menu-title">PISA Guide</h1>

                    <div class="right-buttons">
                        <a href="../print.html" title="Print this book" aria-label="Print this book">
                            <i id="print-button" class="fa fa-print"></i>
                        </a>

                    </div>
                </div>

                <div id="search-wrapper" class="hidden">
                    <form id="searchbar-outer" class="searchbar-outer">
                        <input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
                    </form>
                    <div id="searchresults-outer" class="searchresults-outer hidden">
                        <div id="searchresults-header" class="searchresults-header"></div>
                        <ul id="searchresults">
                        </ul>
                    </div>
                </div>

                <!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
                <script>
                    document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
                    document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
                    Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
                        link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
                    });
                </script>

                <div id="content" class="content">
                    <main>
                        <h1 id="retrieval-algorithms"><a class="header" href="#retrieval-algorithms">Retrieval Algorithms</a></h1>
<p>This is the list of the supported query processing algorithms.</p>
<h2 id="unranked"><a class="header" href="#unranked">Unranked</a></h2>
<p>PISA implements two <em>unranked</em> algorithms, meaning they return a full
list of documents matching the query in the order of their appearance in
the posting lists.</p>
<h3 id="intersection"><a class="header" href="#intersection">Intersection</a></h3>
<p>The intersection algorithm (<code>and</code>) returns only the documents that match
all query terms.</p>
<h3 id="union"><a class="header" href="#union">Union</a></h3>
<p>The union algorithm (<code>or</code>) returns all the documents that match any
query term.</p>
<h2 id="top-k-retrieval"><a class="header" href="#top-k-retrieval">Top-k Retrieval</a></h2>
<p>Top-k retrieval returns the top-k highest scored documents with respect
To the given query.</p>
<h3 id="document-at-a-time-daat"><a class="header" href="#document-at-a-time-daat">Document-at-a-time (DaaT)</a></h3>
<p>Document-at-a-time algorithms traverse one document at a time. They rely
on posting lists being sorted by document IDs, and scan them in step to
retrieve all frequencies for a document right away.</p>
<h4 id="conjunctive-processing"><a class="header" href="#conjunctive-processing">Conjunctive processing</a></h4>
<p>Conjunctive processing (<code>ranked_and</code>) returns the top <em>k</em> documents that
contain <em>all</em> of the query terms. This is an exhaustive algorithm,
meaning all documents must be scored.</p>
<h4 id="disjunctive-processing"><a class="header" href="#disjunctive-processing">Disjunctive processing</a></h4>
<p>Conjunctive processing (<code>ranked_or</code>) returns the top <em>k</em> documents that
contain <em>any</em> of the query terms. This is an exhaustive algorithm,
meaning all documents must be scored.</p>
<h4 id="maxscore"><a class="header" href="#maxscore">MaxScore</a></h4>
<p>MaxScore (<code>maxscore</code>) uses precomputed maximum partial scores for each
term to avoid calculating all scores. It is especially suitable for
longer queries (high term count), short posting lists, or high values of
<em>k</em> (number of returned top documents).</p>
<blockquote>
<p>Howard Turtle and James Flood. 1995. Query evaluation: strategies and
optimizations. Inf. Process. Manage. 31, 6 (November 1995), 831-850.
DOI=http://dx.doi.org/10.1016/0306-4573(95)00020-H</p>
</blockquote>
<h4 id="wand"><a class="header" href="#wand">WAND</a></h4>
<p>Similar to MaxScore, WAND (<code>wand</code>) uses precomputed maximum partial
scores for each term to avoid calculating all scores. Its performance is
sensitive to the term count, so it may not be the best choice for long
queries. It may also take a performance hit when <em>k</em> is very high, in
which case MaxScore may prove more efficient.</p>
<blockquote>
<p>Andrei Z. Broder, David Carmel, Michael Herscovici, Aya Soffer, and
Jason Zien. 2003. Efficient query evaluation using a two-level
retrieval process. In Proceedings of the twelfth international
conference on Information and knowledge management (CIKM '03). ACM,
New York, NY, USA, 426-434. DOI: https://doi.org/10.1145/956863.956944</p>
</blockquote>
<h4 id="blockmax-wand"><a class="header" href="#blockmax-wand">BlockMax WAND</a></h4>
<p>BlockMax WAND (<code>block_max_wand</code>) builds on top of WAND. It uses
additional precomputed scores for ranges of documents in posting lists,
which allows for skipping entire blocks of documents if their max score
is low enough.</p>
<blockquote>
<p>Shuai Ding and Torsten Suel. 2011. Faster top-k document retrieval
using block-max indexes. In Proceedings of the 34th international ACM
SIGIR conference on Research and development in Information Retrieval
(SIGIR '11). ACM, New York, NY, USA, 993-1002.
DOI=http://dx.doi.org/10.1145/2009916.2010048</p>
</blockquote>
<h4 id="variable-blockmax-wand"><a class="header" href="#variable-blockmax-wand">Variable BlockMax WAND</a></h4>
<p>Variable BlockMax WAND is the same algorithm as <code>block_max_wand</code> at
query time. The difference is in precomputing the block-max scores.
Instead having even block sizes, each block can have a different size,
to optimize the effectiveness of skips.</p>
<blockquote>
<p>Antonio Mallia, Giuseppe Ottaviano, Elia Porciani, Nicola Tonellotto,
and Rossano Venturini. 2017. Faster BlockMax WAND with Variable-sized
Blocks. In Proceedings of the 40th International ACM SIGIR Conference
on Research and Development in Information Retrieval (SIGIR '17). ACM,
New York, NY, USA, 625-634. DOI:
https://doi.org/10.1145/3077136.3080780</p>
</blockquote>
<h4 id="blockmax-maxscore"><a class="header" href="#blockmax-maxscore">BlockMax MaxScore</a></h4>
<p>BlockMax MaxScore (<code>block_max_maxscore</code>) is a MaxScore implementation
with additional block-max scores, similar to BlockMax WAND.</p>
<h4 id="blockmax-and"><a class="header" href="#blockmax-and">BlockMax AND</a></h4>
<p>BlockMax AND (<code>block_max_ranked_and</code>) is a conjunctive algorithm using
block-max scores.</p>
<h3 id="term-at-a-time-taat"><a class="header" href="#term-at-a-time-taat">Term-at-a-time (TaaT)</a></h3>
<p>Term-at-a-time algorithms traverse one posting list at a time. Thus,
they cannot rely on all frequencies for a given document being known at
the time of their processing. This requires an accumulator structure to
keep partial scores.</p>
<h4 id="disjunctive-taat-processing"><a class="header" href="#disjunctive-taat-processing">Disjunctive TaaT processing</a></h4>
<p>Disjunctive TaaT (<code>ranked_or_taat</code>) is a simple algorithm that
accumulates document scores while traversing postings one list at a
time. <code>ranked_or_taat_lazy</code> is a variant that uses an accumulator array
that initializes lazily.</p>

                    </main>

                    <nav class="nav-wrapper" aria-label="Page navigation">
                        <!-- Mobile navigation buttons -->
                            <a rel="prev" href="../guide/querying.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
                                <i class="fa fa-angle-left"></i>
                            </a>

                            <a rel="next prefetch" href="../guide/reordering.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
                                <i class="fa fa-angle-right"></i>
                            </a>

                        <div style="clear: both"></div>
                    </nav>
                </div>
            </div>

            <nav class="nav-wide-wrapper" aria-label="Page navigation">
                    <a rel="prev" href="../guide/querying.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
                        <i class="fa fa-angle-left"></i>
                    </a>

                    <a rel="next prefetch" href="../guide/reordering.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
                        <i class="fa fa-angle-right"></i>
                    </a>
            </nav>

        </div>




        <script>
            window.playground_copyable = true;
        </script>


        <script src="../elasticlunr.min.js"></script>
        <script src="../mark.min.js"></script>
        <script src="../searcher.js"></script>

        <script src="../clipboard.min.js"></script>
        <script src="../highlight.js"></script>
        <script src="../book.js"></script>

        <!-- Custom JS scripts -->


    </div>
    </body>
</html>
